This is an attempt to reproduce the anaylses presented in the paper Chaos in a long-term experiment with a plankton community, by Elisa Benincà and others (the paper on the Nature website). Details of the methods are in the Supplement to the Nature paper.
The data are available as an Excel file supplement to an Ecology Letters publication. The Excel file contains several datasheets. Two are particularly important, as they are the source of the raw data (one contains original species abundances, the one with the nutrient concentrations). Another datasheet in the ELE supplement contains transformed variables.
rm(list=ls())
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
library(stringr)
library(ggplot2)
library(RCurl)
## Loading required package: bitops
library(pracma)
library(oce)
## Loading required package: mapproj
## Loading required package: maps
##
## Attaching package: 'oce'
##
## The following objects are masked from 'package:pracma':
##
## detrend, grad
##
## The following object is masked from 'package:tidyr':
##
## extract
library(tseriesChaos)
## Loading required package: deSolve
##
## Attaching package: 'deSolve'
##
## The following object is masked from 'package:pracma':
##
## rk4
library(reshape2)
spp.abund <- read.csv(text=getURL("https://raw.githubusercontent.com/opetchey/RREEBES/master/Beninca_etal_2008_Nature/data/species_abundances_original.csv"), skip=7, header=T)
#spp.abund <- read.csv("~/Dropbox (Dept of Geography)/RREEBES/Beninca_etal_2008_Nature/data/species_abundances_original.csv", skip=7, header=T)
spp.abund <- select(spp.abund, -X, -X.1)
spp.abund <- spp.abund[-804:-920,]
str(spp.abund)
## 'data.frame': 803 obs. of 12 variables:
## $ Date : Factor w/ 803 levels "","01/02/91",..: 306 440 465 498 520 601 628 673 699 778 ...
## $ Day.number : int 1 6 7 8 9 12 13 15 16 19 ...
## $ Cyclopoids : num 0 0 0.0353 0 0.0353 ...
## $ Calanoid.copepods : num 1.04 2.03 1.72 2.41 1.71 ...
## $ Rotifers : num 7.7 10.19 8.08 6.06 5.94 ...
## $ Protozoa : Factor w/ 330 levels "","0","0,000001",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Nanophytoplankton : num 0.106 0.212 0.212 0.212 0.212 ...
## $ Picophytoplankton : num 1 2 1.52 1.52 1.98 ...
## $ Filamentous.diatoms: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Ostracods : num 0 0 0 0.0187 0 ...
## $ Harpacticoids : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Bacteria : num 2.15 1.97 1.79 1.61 1.43 ...
The Protozoa variable contains some numbers with comman as the decimal separator. This creates a question about what dataset was used for the original analyses, as it could not have been this one.
spp.abund$Protozoa <- as.numeric(str_replace(spp.abund$Protozoa, ",", "."))
Format the dates as dates
spp.abund$Date <- dmy(spp.abund$Date)
Ooops… R assumes the experiment was done in the 21st century. Shouldn’t matter too much.
Check dates match the Day.number (should give true):
sum(spp.abund$Day.number == 1+as.numeric((spp.abund$Date - spp.abund$Date[1]) / 24 / 60 / 60)) == length(spp.abund$Date)
## [1] TRUE
Check for duplicate dates:
spp.abund$Date[duplicated(spp.abund$Date)]
## [1] "2096-10-28 UTC"
which(duplicated(spp.abund$Date))
## [1] 702
Original dataset contains a duplicated date: 28/10/1996 (row 709 and 710 in excel sheet). Lets change the date in row 709 to 26/10/1996, which will put it half way between the two surrounding dates:
which(spp.abund$Date==ymd("2096-10-28 UTC"))
## [1] 701 702
spp.abund$Date[701] <- ymd("2096-10-26 UTC")
Check dates match the Day.number (should give true):
sum(spp.abund$Day.number == 1+as.numeric((spp.abund$Date - spp.abund$Date[1]) / 24 / 60 / 60)) == length(spp.abund$Date)
## [1] FALSE
Fix the Day.number problem:
spp.abund$Day.number <- 1+as.numeric((spp.abund$Date - spp.abund$Date[1]) / 24 / 60 / 60)
Data is in wide format, so change it to long:
spp.abund <- gather(spp.abund, "variable", "value", 3:12)
str(spp.abund)
## 'data.frame': 8030 obs. of 4 variables:
## $ Date : POSIXct, format: "2090-07-12" "2090-07-17" ...
## $ Day.number: num 1 6 7 8 9 12 13 15 16 19 ...
## $ variable : Factor w/ 10 levels "Cyclopoids","Calanoid.copepods",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ value : num 0 0 0.0353 0 0.0353 ...
Bring in the nutrient data:
nuts <- read.csv(text=getURL("https://raw.githubusercontent.com/opetchey/RREEBES/master/Beninca_etal_2008_Nature/data/nutrients_original.csv"), skip=7, header=T)
#nuts <- read.csv("~/Dropbox (Dept of Geography)/RREEBES/Beninca_etal_2008_Nature/data/nutrients_original.csv", skip=7, header=T)
nuts <- select(nuts, -X, -X.1)
nuts <- nuts[-349:-8163,]
nuts$Date <- dmy(nuts$Date)
nuts <- select(nuts, -NO2, -NO3, -NH4)
nuts$Date[duplicated(nuts$Date)]
## character(0)
which(duplicated(nuts$Date))
## integer(0)
nuts <- gather(nuts, "variable", "value", 3:4)
str(nuts)
## 'data.frame': 696 obs. of 4 variables:
## $ Date : POSIXct, format: "2090-09-18" "2090-09-24" ...
## $ Day.number: int 69 75 82 90 96 103 110 117 124 131 ...
## $ variable : Factor w/ 2 levels "Total.dissolved.inorganic.nitrogen",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ value : num 28.32 20.84 11.15 15.5 5.92 ...
Now put the two datasets together
all.data <- rbind(spp.abund, nuts)
Now select only the date range used in the Nature paper. From the supplment The analysis in Benincà et al. (Nature 2008) covered all data from 16/06/1991 until 20/10/1997. (Remembering dates in the R dataframes are 2090s.)
all.data <- filter(all.data, Date>=dmy("15/06/2091") & Date<=dmy("21/10/2097"))
#all.data[all.data$Date==dmy("16/06/2091"),]
#all.data[all.data$Date==dmy("20/10/2097"),]
(No attempt to reproduce Figure 1a, as its a food web diagram.)
First quick go:
ggplot(all.data, aes(x=Day.number, y=value)) + geom_line() +
facet_wrap(~variable, scales="free_y")
The code from here on needs modifying, as the object names are Owen’s old ones, and need changing to those used above.
Now we add a column that gives the variable types, same as in figure 1b through 1g. First make a lookup table giving species type:
tt <- data.frame(variable=unique(all.data$variable),
Type=c("Cyclopoids", "Herbivore", "Herbivore", "Herbivore",
"Phytoplankton", "Phytoplankton", "Phytoplankton",
"Detritivore", "Detritivore", "Bacteria", "Nutrient", "Nutrient"))
tt
## variable Type
## 1 Cyclopoids Cyclopoids
## 2 Calanoid.copepods Herbivore
## 3 Rotifers Herbivore
## 4 Protozoa Herbivore
## 5 Nanophytoplankton Phytoplankton
## 6 Picophytoplankton Phytoplankton
## 7 Filamentous.diatoms Phytoplankton
## 8 Ostracods Detritivore
## 9 Harpacticoids Detritivore
## 10 Bacteria Bacteria
## 11 Total.dissolved.inorganic.nitrogen Nutrient
## 12 Soluble.reactive.phosphorus Nutrient
And add the Type variable to the new dataset:
all.data <- merge(all.data, tt)
First lets set the colours as in the original:
species.colour.mapping <- c("Cyclopoids"="pink",
"Calanoid.copepods"="red",
"Rotifers"="blue",
"Protozoa"="green",
"Nanophytoplankton"="red",
"Picophytoplankton"="black",
"Filamentous.diatoms"="green",
"Ostracods"="lightblue",
"Harpacticoids"="purple",
"Bacteria"="black",
"Total.dissolved.inorganic.nitrogen"="red",
"Soluble.reactive.phosphorus"="black")
Next change the order of the levels in the Type variable, so plots appear in the same order as in the original figure:
all.data$Type <- factor(all.data$Type, levels=c("Cyclopoids", "Herbivore", "Phytoplankton", "Nutrient",
"Bacteria", "Detritivore"))
Now a version that doesn’t try to recreate the “gap” in the y axes of the original figures:
g1 <- qplot(as.numeric(Day.number), value, col=variable, data=all.data) +
facet_wrap(~Type, ncol=2, scales="free_y") +
geom_point() + geom_line() +
scale_colour_manual(values = species.colour.mapping)
g1
Looks reasonably good.
Now a version that approximates the “gap”, by removing data above it:
an2 <- filter(all.data, Type=="Cyclopoids" & value<0.6 |
Type=="Herbivore" & value<13 |
Type=="Phytoplankton" & value<1400 |
Type=="Nutrient" & value<50 |
Type=="Bacteria" & value<10 |
Type=="Detritivore" & value<0.7)
g1 <- qplot(as.numeric(Day.number), value, col=variable, data=an2) +
facet_wrap(~Type, ncol=2, scales="free_y") +
geom_point() + geom_line() +
scale_colour_manual(values = species.colour.mapping)
g1
Difficult it look like the data go off the top of the graph in ggplot.
Try logarithmic y-axes:
g1 <- qplot(as.numeric(Day.number), log10(value+0.00001), col=variable, data=all.data) +
facet_wrap(~Type, ncol=2, scales="free_y") +
geom_point() + geom_line() +
scale_colour_manual(values = species.colour.mapping)
g1
Try fourth root, as this is used in the subsequent stats.
g1 <- qplot(as.numeric(Day.number), value^0.25, col=variable, data=all.data) +
facet_wrap(~Type, ncol=2, scales="free_y") +
geom_point() + geom_line() +
scale_colour_manual(values = species.colour.mapping)
g1
Now we need to work with transformed data. Details of the transformation, copied from the Supplmentary information are in indented quote style in the following sections… looks like this:
- Transformation of the time series. We transformed the original time series, shown in Fig. 1b-g of the main text, to obtain stationary time series with equidistant data and homogeneous units of measurement. The transformation steps are illustrated for the bacteria (Fig. S1).
Aside: The ELE supplement contains the raw data and the transformed data, in separate data sheets. I (Owen) also got the interpolated data from Stephen Ellner directly.
First, the time series were interpolated using cubic hermite interpolation, to obtain data with equidistant time intervals of 3.35 days (Fig. S1a).
Make a sequence of times at which to interpolate.
#aggregate(Day.number ~ variable, all.data, min)
#aggregate(Day.number ~ variable, all.data, max)
#xout <- seq(343.35, 2657.2, by=3.35)
xout <- seq(341.7, 2653.1, by=3.35)
range(xout)
## [1] 341.70 2649.85
all.data1 <- na.omit(all.data)
group_by(all.data1, variable) %>% summarise(out=min(Day.number))
## Source: local data frame [12 x 2]
##
## variable out
## 1 Cyclopoids 340
## 2 Calanoid.copepods 340
## 3 Rotifers 340
## 4 Protozoa 340
## 5 Nanophytoplankton 340
## 6 Picophytoplankton 340
## 7 Filamentous.diatoms 340
## 8 Ostracods 340
## 9 Harpacticoids 340
## 10 Bacteria 340
## 11 Total.dissolved.inorganic.nitrogen 341
## 12 Soluble.reactive.phosphorus 341
mt <- plyr::dlply(all.data1,
"variable",
function(xx) pracma::interp1(x=xx$Day.number,
y=xx$value,
xi=xout,
method="cubic"))
## Aside: the duplicated date that was previously fixed was only discovered by a warning message
## given by the pracma::interp1 function!!!
mt <- as.data.frame(mt)
mt <- cbind(Day.number=xout, mt)
mt <- gather(mt, variable, value, 2:13)
#ggplot(mt, aes(x=Day.number, y=value)) + facet_wrap(~variable, scales="free") + geom_line()
Check this against the data direct from Steve:
#from.steve <- read.csv("~/Dropbox (Dept of Geography)/RREEBES/Beninca_etal_2008_Nature/data/direct from Steve/interp_short_allsystem_newnames.csv")
from.steve <- read.csv(text=getURL("https://raw.githubusercontent.com/opetchey/RREEBES/boost_by_owen/Beninca_etal_2008_Nature/data/direct%20from%20Steve/interp_short_allsystem_newnames.csv"), header=T)
from.steve <- gather(from.steve, Species, Abundance, 2:13)
names(from.steve) <- c("Day.number", "variable", "value")
g1 <- ggplot(mt, aes(x=as.numeric(Day.number), y=value)) +
facet_wrap(~variable, ncol=2, scales="free_y") +
geom_line(size=0.5, col="black") +
scale_colour_manual(values = species.colour.mapping)
g2 <- geom_line(data=from.steve, aes(x=Day.number, y=value), col="red")
g1 + g2
Looks very good.
Next, because the original time series showed many sharp spikes, the time series were rescaled using a fourth-root power transformation (Fig. S1b). The sharp spikes bias “direct method” estimates of the Lyapunov exponent, because nearby pairs of reconstructed state vectors mostly occurred in the troughs between spikes. The average rate of subsequent trajectory divergence from these pairs is therefore an estimate of the local Lyapunov exponent in the troughs, which may be very different from the global Lyapunov exponent. By making spikes and troughs more nearly symmetric, the power transformation resulted in a much more even spread of nearby state vector pairs across the full range of the data for all functional groups in the food web. The transformation is also useful for fitting nonlinear models of the deterministic skeleton (used for nonlinear predictability and indirect method estimates of the Lyapunov exponent), which was done by least squares and therefore is most efficient when error variances are stabilized. Fourth-root transformation is intermediate between the square-root transformation that would approximately stabilize the measurement error variance in count data from random subsamples, and the log transformation that is usually recommended for stabilizing process noise variance due to stochastic variation in birth and death rates.
mt$fr.value <- mt$value^0.25
The time series were then detrended using a Gaussian kernel with a bandwidth of 300 days (red line in Fig. S1b), to obtain stationary time series. Most species did not show long-term trends, except for the bacteria, detritivores (ostracods and harpacticoid copepods), dissolved inorganic nitrogen and soluble reactive phosphorus. One possible explanation for these trends in the microbial loop could be the slow accumulation of refractory organic material in the mesocosm, but we have not measured this component.
ww.td <- filter(mt, variable=="Total.dissolved.inorganic.nitrogen" |
variable=="Soluble.reactive.phosphorus" |
variable=="Bacteria" |
variable=="Ostracods" |
variable=="Harpacticoids")
## and to not detrend
ww.ntd <- filter(mt, variable!="Total.dissolved.inorganic.nitrogen" &
variable!="Soluble.reactive.phosphorus" &
variable!="Bacteria" &
variable!="Ostracods" &
variable!="Harpacticoids")
## detrend:
ww1 <- group_by(ww.td, variable) %>%
mutate(trend=ksmooth(Day.number,fr.value,bandwidth=300,kernel="normal")$y)
ww1$dt.value <- ww1$fr.value-ww1$trend
#ww1 <- select(ww1, trend)
## don't detrend
ww2 <- ww.ntd
ww2$trend <- 0
ww2$dt.value <- ww2$fr.value
## rejoin
detrended <- rbind(ww1, ww2)
Finally, the time series were linearly rescaled to have zero mean and a standard deviation of 1 (Fig. S1c).
(Note that this standardisation is not done in the code sent by Stephen. Probably shouldn’t make a difference in the GAMs?)
## standardise
#final <- group_by(detrended, variable) %>%
# mutate(y=as.numeric(scale(dt.value)))
#summarise(final, mean=mean(y), sd=sd(y))
## or don't standardise
final <- detrended
final$y <- final$dt.value
summarise(final, mean=mean(y), sd=sd(y))
## Source: local data frame [12 x 3]
##
## variable mean sd
## 1 Cyclopoids 0.2768795061 0.2243968
## 2 Calanoid.copepods 0.5542284301 0.4665221
## 3 Rotifers 0.5609051093 0.4526836
## 4 Protozoa 0.4276544332 0.4257087
## 5 Nanophytoplankton 0.4653786246 0.3362051
## 6 Picophytoplankton 0.6848727412 0.5659768
## 7 Filamentous.diatoms 0.4569957928 0.8320991
## 8 Ostracods -0.0006408208 0.2203757
## 9 Harpacticoids -0.0015795559 0.2147656
## 10 Bacteria -0.0003572609 0.1349343
## 11 Total.dissolved.inorganic.nitrogen -0.0022362304 0.3018032
## 12 Soluble.reactive.phosphorus 0.0033387271 0.2752587
## save up to here
#save.image("~/Dropbox (Dept of Geography)/RREEBES/Beninca_etal_2008_Nature/data/up_to_transform.Rdata")
#load("~/Dropbox (Dept of Geography)/RREEBES/Beninca_etal_2008_Nature/data/up_to_transform.Rdata")
We now have a y variable to work with in data.frame “final”, variable name “y”.
glimpse(final)
## Observations: 8280
## Variables:
## $ Day.number (dbl) 341.70, 345.05, 348.40, 351.75, 355.10, 358.45, 361...
## $ variable (fctr) Ostracods, Ostracods, Ostracods, Ostracods, Ostrac...
## $ value (dbl) 0.0000000000, 0.0000000000, 0.0000000000, 0.0000000...
## $ fr.value (dbl) 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00000...
## $ trend (dbl) 0.03645012, 0.03672370, 0.03700287, 0.03727249, 0.0...
## $ dt.value (dbl) -0.03645012, -0.03672370, -0.03700287, -0.03727249,...
## $ y (dbl) -0.03645012, -0.03672370, -0.03700287, -0.03727249,...
The time series of cyclopoid copepods, protozoa, filamentous diatoms, harpacticoid copepods and ostracods contained long sequences of zero values. This does not imply that these species were absent from the food web during these periods, but that their concentrations were below the detection limit. Time series dominated by many zeros can bias the statistical analysis. Therefore, these time series were shortened to remove long sequences of zero values, before the data transformation. The transformed data of all species in the food web are shown in Figure S2.
This is not done. May need to be done only for analyses for Table 1.
Choose a species to plot:
soi <- "Rotifers"
Raw and interpolated data:
g1 <- ggplot(filter(all.data, variable==soi), aes(x=Day.number, y=value)) +
facet_wrap(~variable, ncol=2, scales="free_y") +
geom_point(size=1, col="black") + geom_line(size=0.1) +
scale_colour_manual(values = species.colour.mapping) + ggtitle("Raw and interpolated data")
g2 <- geom_line(data=filter(final, variable==soi), aes(x=Day.number, y=value), size=0.25, col="blue")
g1 + g2
## Warning in loop_apply(n, do.ply): Removed 1 rows containing missing values
## (geom_point).
Fourth root transformed with trend:
g1 <- ggplot(filter(final, variable==soi), aes(x=Day.number, y=fr.value)) +
facet_wrap(~variable, ncol=2, scales="free_y") +
geom_point(size=0.5, col="black") + geom_line(size=0.1) +
scale_colour_manual(values = species.colour.mapping) + ggtitle("Quarter power trans. and trend")
g2 <- geom_line(data=filter(final, variable==soi), aes(x=Day.number, y=trend), size=0.25, col="blue")
g1 + g2
Detrended and normalised:
g1 <- ggplot(filter(final, variable==soi), aes(x=Day.number, y=y)) +
facet_wrap(~variable, ncol=2, scales="free_y") +
geom_point(size=0.5, col="black") + geom_line(size=0.1) +
scale_colour_manual(values = species.colour.mapping) + ggtitle("Detrended and normalised")
g1
Now take a look at the transformed data provided in the ELE Supplement, in the data sheet transformed_data_Nature2008.
First note that the data in the ELE supplement include a data point at day 2658.1, whereas the data above finish at 2654.85. This is because the real data end at 2658, and the interpolation method above doesn’t want to create data outside the range of the original data.
Looks good. Apart from when there were series of zeros removed, presumably before the standardisation.
# Raw spectrum
spectra <- final %>% group_by(variable) %>% do(spectra = spectrum(ts(data=.$y, end=2650.15, deltat=3.35), log='no', method="pgram", detrend=F, plot=F))
spec <- spectra %>% do(data.frame(spec = .$spec[[2]], freq = .$spec[[1]], group = .[[1]]))
ggplot(spec, aes(y=spec, x=1/freq, group=group)) + geom_line() + facet_wrap(~group) +
coord_cartesian(ylim=c(0,40), xlim=c(0,240))
freq.est <- spec %>% group_by(group) %>% mutate(max_spec = max(spec), freq = freq)
## Warning: Grouping rowwise data frame strips rowwise nature
freq.est <- subset(freq.est, max_spec==spec, select=c(freq,group))
freq.est$freq <- 1/freq.est$freq
freq.est
## Source: local data frame [12 x 2]
## Groups: group
##
## freq group
## 1 804.00000 Cyclopoids
## 2 2412.00000 Calanoid.copepods
## 3 2412.00000 Rotifers
## 4 2412.00000 Protozoa
## 5 1206.00000 Nanophytoplankton
## 6 2412.00000 Picophytoplankton
## 7 1206.00000 Filamentous.diatoms
## 8 160.80000 Ostracods
## 9 402.00000 Harpacticoids
## 10 219.27273 Bacteria
## 11 70.94118 Total.dissolved.inorganic.nitrogen
## 12 160.80000 Soluble.reactive.phosphorus
# Welch's periodogram
wspectra <- final %>% group_by(variable) %>% do(spectra = pwelch(ts(data=.$y, end=2650.15, deltat=3.35), window=5, method="pgram", plot=F))
wspec <- wspectra %>% do(data.frame(spec = .$spec[[2]], freq = .$spec[[1]], group = .[[1]]))
ggplot(wspec, aes(y=spec, x=1/freq, group=group)) + geom_line() + facet_wrap(~group) +
coord_cartesian(ylim=c(0.1,100), xlim=c(0,240))+
scale_y_continuous(trans="log")
freq.est <- wspec %>% group_by(group) %>% mutate(max_spec = max(spec), freq = freq)
## Warning: Grouping rowwise data frame strips rowwise nature
freq.est <- subset(freq.est, max_spec==spec, select=c(freq,group))
freq.est$freq <- 1/freq.est$freq
frequency(final$y)
## [1] 1
ts <- as.ts(final$y, frequency = 0.3)
#time(ts)
Create dataset with zeros removed for this table (note that this is probably not how Beninca et al did this):
final_nozeros <- final
final_nozeros$y <- ifelse(final_nozeros$y!=0, final_nozeros$y, NA)
# make it wide format
#library(tidyr)
final_long <- final_nozeros[final_nozeros$variable!="Cyclopoids" & final_nozeros$variable!="Filamentous.diatoms",c(1,2,7)]
final_wide <- spread(final_long, variable,y)
# we also removed data on Cyclopoids, not used in the table
Calculate correlation coefficients:
cor.coefs <- cor(final_wide[,c(-1)])
Only keep the upper triangle of the cor.pvals matrix:
for(i in 1:10){
for(j in 1:10){
cor.coefs[i,j] <- ifelse(i<j, cor.coefs[i,j], NA)
}}
Get p-vals too:
# https://stat.ethz.ch/pipermail/r-help/2005-July/076050.html
pn <- function(X){crossprod(!is.na(X))}
cor.prob <- function(X){
# Correlations Below Main Diagonal
# Significance Tests with Pairwise Deletion
# Above Main Diagonal
# Believe part of this came from Bill Venables
pair.SampSize <- pn(X)
above1 <- row(pair.SampSize) < col(pair.SampSize)
pair.df <- pair.SampSize[above1] - 2
R <- cor(X, use="pair")
above2 <- row(R) < col(R)
r2 <- R[above2]^2
Fstat <- (r2 * pair.df)/(1 - r2)
R[above2] <- 1 - pf(Fstat, 1, pair.df)
R
}
cor.pvals <- cor.prob(final_wide[,c(-1)])
Only keep the upper triangle of the cor.pvals matrix:
for(i in 1:10){
for(j in 1:10){
cor.pvals[i,j] <- ifelse(i<j, cor.pvals[i,j], NA)
}}
Add significance “stars” to cor.coefs from cor.pvals
cor.stars <- cor.pvals
cor.stars <- ifelse(cor.pvals<0.0001, "***",
ifelse(cor.pvals<0.001, "**",
ifelse(cor.pvals<0.05, "*", "")))
cor.cp <- cor.coefs
for(i in 1:10){
for(j in 1:10){
cor.cp[i,j] <- paste(round(cor.coefs[i,j],3), cor.stars[i,j])
}}
Remove NAs:
for(i in 1:10){
for(j in 1:10){
cor.cp[i,j] <- ifelse(cor.cp[i,j]=="NA NA", "", cor.cp[i,j])
}}
for(i in 1:10){
for(j in 1:10){
cor.cp[i,j] <- ifelse(i==j, "1", cor.cp[i,j])
}}
colnames(cor.cp) <- c("Calanoid.copepods ","Rotifers ","Protozoa ","Nanophytoplankton ","Picophytoplankton ",
"Ostracods ","Harpacticoid.copepods ","Bacteria ","Nitrogen ","Phosphorus ")
rownames(cor.cp)<-colnames(cor.cp)
Make it a table:
library(knitr)
table1b <- kable(cor.cp, format="html", col.names = colnames(cor.cp), align="c",
caption="Table 1.'Correlations between the species in the food web. Table entries show the product–moment correlation coefficients, after transformation of the data to stationary time series (see Methods). Significance tests were corrected for multiple hypothesis testing by calculation of adjusted P values using the false discovery rate.' Significant correlations are indicated as follows: *: P<0.05; **: P<0.01; ***: P<0.001. 'The correlation between calanoid copepods and protozoa could not be calculated, because their time series did not overlap. Filamentous diatoms and cyclopoid copepods were not included in the correlation analysis, because their time series contained too many zeros.' (Beninca et al. 2008)")
table1b
| Calanoid.copepods | Rotifers | Protozoa | Nanophytoplankton | Picophytoplankton | Ostracods | Harpacticoid.copepods | Bacteria | Nitrogen | Phosphorus | |
|---|---|---|---|---|---|---|---|---|---|---|
| Calanoid.copepods | 1 | NA * | NA *** | NA | NA | NA | NA * | NA ** | NA | NA |
| Rotifers | 1 | NA | NA *** | NA | NA *** | NA ** | NA *** | NA | NA *** | |
| Protozoa | 1 | NA * | NA | NA | NA | NA | NA * | NA | ||
| Nanophytoplankton | 1 | NA * | NA | NA | NA * | NA | NA | |||
| Picophytoplankton | 1 | NA | NA | NA * | NA | NA | ||||
| Ostracods | 1 | 0.416 *** | 0.081 * | -0.093 * | 0.144 ** | |||||
| Harpacticoid.copepods | 1 | 0.067 | -0.056 | 0.101 * | ||||||
| Bacteria | 1 | 0.066 | 0.183 *** | |||||||
| Nitrogen | 1 | 0.083 * | ||||||||
| Phosphorus | 1 |
# differs from the one published by Beninca et al.!
This will be done after getting Lyapunov exponents by the indirect method, as the model for that is used here (we believe).
Estimate the Lyapunov exponents of the time series, via time-delayed embedding. The Nature report used the Tisean software, which was available from CRAN until mid 2014. Based on this, and being a bit less well integrated with R, we’ll instead use the tseriesChaos package, which was largely inspired by the TISEAN project.
Unclear if this was performed on untransformed or transformed data. First try with the transformed data. Time delay (1), embedding dimension (6), and Theiler window (50) were used in the Nature report. Other parameters are chosen rather randomly, though don’t seem to matter too much!
time.delay <- 1
embedding.dimension <- 6
Theiler.window <- 50
Note that a time step is 3.35 days in the transformed data. So to get a graph with 80 days on the x-axis (as in Figure 3 in the Nature report), we need 80/3.35 = 24 time steps for the calculation of Lyapunov exponents.
time.steps <- 24
Remove the species that were not analysed in the Nature report, due to too many zeros in the time series:
led <- filter(tr, variable!="Filamentous.diatoms",
variable!="Protozoa",
variable!="Cyclopoids")
Get the data for the graphs:
all.species <- unique(as.character(led$variable))
diverg <- matrix(NA, time.steps, length(all.species))
colnames(diverg) <- all.species
for(i in 1:length(all.species)) {
print(all.species[i])
tr.fs <- filter(final, variable==all.species[i])$y
diverg[,i] <- as.numeric(try(lyap_k(tr.fs,
m=embedding.dimension,
d=time.delay,
k=10, # number of considered neighbours 20
ref=100, # number of points to take into account 100
t=Theiler.window,
s=time.steps,
eps=10 # radius where to find nearest neighbours 10
)))
}
## [1] "Bacteria"
## Finding nearests
## Keeping 100 reference points
## Following points
## [1] "Calanoid.copepods"
## Finding nearests
## Keeping 100 reference points
## Following points
## [1] "Harpacticoids"
## Finding nearests
## Keeping 100 reference points
## Following points
## [1] "Nanophytoplankton"
## Finding nearests
## Keeping 100 reference points
## Following points
## [1] "Ostracods"
## Finding nearests
## Keeping 100 reference points
## Following points
## [1] "Picophytoplankton"
## Finding nearests
## Keeping 100 reference points
## Following points
## [1] "Rotifers"
## Finding nearests
## Keeping 100 reference points
## Following points
## [1] "Soluble.reactive.phosphorus"
## Finding nearests
## Keeping 100 reference points
## Following points
## [1] "Total.dissolved.inorganic.nitrogen"
## Finding nearests
## Keeping 100 reference points
## Following points
## a bit of a fudge with the translation to days
diverg <- as.data.frame(cbind(days=1:time.steps, diverg))
diverg <- gather(diverg, Species, Difference, 2:10)
diverg$days <- diverg$days*3.35
#str(diverg)
Next calculate the Lyapunov exponents, noting that 6 or 7 points were used in the regressions in the Nature report
diverg$Difference[is.na(diverg$Difference)] <- 0
diverg$Difference[is.infinite(diverg$Difference)] <- 0
diverg.short <- filter(diverg, days<24) ## 24 is about 6 steps, after initial gap
LEs <- group_by(diverg.short, Species) %>%
summarise(le=coef(lm(Difference[1:6] ~ days[1:6]))[2])
#pval=summary(lm(Difference[1:6] ~ days[1:6]))$coefficients[2,4])
Then plot the graphs with LE:
LEs <- mutate(LEs, days=20, Difference=-0.5)
g1 <- ggplot(diverg, aes(x=days, y=Difference)) + geom_point() + facet_wrap(~Species) +
geom_text(data=LEs, aes(label=round(le,3)), group=NULL)
g1
Not exactly the same at Figure 3 in the Nature report. Qualitatively the same, except for where the time-delayed embedding failed.
The functions used in the code below are based on code received from Stephen Ellner. The modifications have been tested, and produce the same results as Ellner’s original code.
The function needs a matrix, X, with species abundances in wide format. Be careful to work on the unstandardised data (or standardised, if you wish) (comment out appropriate lines here.
## use next line to work on unstandardised data
final.to.melt <- final[, c("variable", "dt.value", "Day.number")]
## use next line to work on standardised
#final.to.melt <- final[, c("variable", "y", "Day.number")]
names(final.to.melt)[1] <- "Species"
melted <- melt(final.to.melt, id=c("Species", "Day.number"))
X <- acast(melted, formula= Day.number ~ Species)
#str(X)
X <- as.data.frame(X)
Restrict range of data appropriately:
## Select the time period to use
start.longest=334; start.longer=808; start.shorter=1035;
e=as.numeric(row.names(X)) > start.longer; X=X[e,];
e=as.numeric(row.names(X)) < 2654; X=X[e,];
Load and run the functions:
# read script lines from website
script <- getURL("https://raw.githubusercontent.com/opetchey/RREEBES/boost_by_owen/Beninca_etal_2008_Nature/report/functions/indirect_method_functions.R", ssl.verifypeer = FALSE)
# parase lines and evealuate in the global environement
eval(parse(text = script))
LE <- Get_GLE_Beninca(X)
## Loading required package: mgcv
## Loading required package: nlme
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:dplyr':
##
## collapse
##
## This is mgcv 1.8-6. For overview type 'help("mgcv-package")'.
##
## Attaching package: 'mgcv'
##
## The following object is masked from 'package:pracma':
##
## magic
LE[[1]]
## [1] 0.00969605
This is quite far from the number using code and data from Steve (0.08415112). But recall that the functions have been carefully checked. Probably it is the data going into this function. A final step would be to save the data here, and take it into Steve’s function. (This point is an issue on github.)
## [1] "Bacteria"
## [2] "Calanoid.copepods"
## [3] "Cyclopoids"
## [4] "Filamentous.diatoms"
## [5] "Harpacticoids"
## [6] "Nanophytoplankton"
## [7] "Ostracods"
## [8] "Picophytoplankton"
## [9] "Protozoa"
## [10] "Rotifers"
## [11] "Soluble.reactive.phosphorus"
## [12] "Total.dissolved.inorganic.nitrogen"
## save up to here
#save.image("~/Dropbox (Dept of Geography)/RREEBES/Beninca_etal_2008_Nature/data/up_to_LE_indirect.Rdata")
#load("~/Dropbox (Dept of Geography)/RREEBES/Beninca_etal_2008_Nature/data/up_to_LE_indirect.Rdata")
Need to predict until about 40 days ahead = 40 / 3.35 time steps = 12 time steps.
From each time in the series, predict 12 steps ahead. Put results in an array: time x species, start.location time will be 12 species will be 12 start location will be length of time series - 12
x <- X
time.to.predict <- 12
nn <- length(x[,1])-time.to.predict
preds <- array(NA, c(12, 12, nn))
dimnames(preds) <- list(dist=1:12,
species=names(x),
start.time=1:nn)
all.species <- names(all.gams)
for(i in 1:length(all.species))
preds[1,i,] <- predict(all.gams[[i]])[1:nn]
Now for the predictions at t+2 from predictions at t+1
pred.time <- 2
for(pred.time in 2:12) {
for(i in 1:length(all.species))
preds[pred.time,i,pred.time:nn] <- predict(all.gams[[i]],
newdata=as.data.frame(t(preds[pred.time-1,,])))[pred.time:nn]
}
cors <- matrix(NA, 12, 12)
dimnames(cors) = list(dist=1:12,
species=names(x))
for(i in 1:12) {
for(j in 1:12) {
cors[j,i] <- cor(X[1:nn,i], preds[j, i, ], use="complete.obs")^2
}}
plot(X[1:nn,"Bacteria"],preds[i, "Bacteria", ])
cors <- data.frame(dist=as.numeric(I(rownames(cors))), cors)
cors.long <- gather(as.data.frame(cors), key=dist)
names(cors.long) <- c("Prediction_distance", "Variable", "Correlation")
str(cors.long)
## 'data.frame': 144 obs. of 3 variables:
## $ Prediction_distance: num 1 2 3 4 5 6 7 8 9 10 ...
## $ Variable : Factor w/ 12 levels "Cyclopoids","Calanoid.copepods",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Correlation : num 0.002091 0.093028 0.407679 0.000714 0.058554 ...
ggplot(cors.long, aes(x=Prediction_distance, y=Correlation)) +
geom_point() +
facet_wrap(~Variable, scales="free_y" )
Clearly not quite right. Still, skeleton is working. Just needs bugs ironing out!